{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a98030af-fcd1-4d63-a36e-38ba053498fa",
   "metadata": {},
   "source": [
    "# A Small Tweak to Week1-Day5\n",
    "\n",
    "If you have network restrictions (such as using a custom DNS provider, or firewall rules at work), you can disable SSL cert verification.\n",
    "Once you do that and start executing your code, the output will be riddled with warnings. Thankfully, you can suppress those warnings,too.\n",
    "\n",
    "See the 2 lines added to the init method, below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "106dd65e-90af-4ca8-86b6-23a41840645b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# A class to represent a Webpage\n",
    "\n",
    "# Some websites need you to use proper headers when fetching them:\n",
    "headers = {\n",
    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
    "}\n",
    "\n",
    "class Website:\n",
    "    \"\"\"\n",
    "    A utility class to represent a Website that we have scraped, now with links\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, url):\n",
    "        self.url = url\n",
    "\n",
    "        #\n",
    "        # If you must disable SSL cert validation, and also suppress all the warning that will come with it,\n",
    "        # add the 2 lines below. This comes in very handy if you have DNS/firewall restrictions; alas, use\n",
    "        # with caution, especially if deploying this in a non-dev environment.\n",
    "        requests.packages.urllib3.disable_warnings() \n",
    "        response = requests.get(url, headers=headers, verify=False)        \n",
    "        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
    "        \n",
    "        self.body = response.content\n",
    "        soup = BeautifulSoup(self.body, 'html.parser')\n",
    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        if soup.body:\n",
    "            for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "                irrelevant.decompose()\n",
    "            self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "        else:\n",
    "            self.text = \"\"\n",
    "        links = [link.get('href') for link in soup.find_all('a')]\n",
    "        self.links = [link for link in links if link]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
